1 /* 2 Egothor Software License version 1.00 3 Copyright (C) 1997-2004 Leo Galambos. 4 Copyright (C) 2002-2004 "Egothor developers" 5 on behalf of the Egothor Project. 6 All rights reserved. 7 8 This software is copyrighted by the "Egothor developers". If this 9 license applies to a single file or document, the "Egothor developers" 10 are the people or entities mentioned as copyright holders in that file 11 or document. If this license applies to the Egothor project as a 12 whole, the copyright holders are the people or entities mentioned in 13 the file CREDITS. This file can be found in the same location as this 14 license in the distribution. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions are 18 met: 19 1. Redistributions of source code must retain the above copyright 20 notice, the list of contributors, this list of conditions, and the 21 following disclaimer. 22 2. Redistributions in binary form must reproduce the above copyright 23 notice, the list of contributors, this list of conditions, and the 24 disclaimer that follows these conditions in the documentation 25 and/or other materials provided with the distribution. 26 3. The name "Egothor" must not be used to endorse or promote products 27 derived from this software without prior written permission. For 28 written permission, please contact Leo.G@seznam.cz 29 4. Products derived from this software may not be called "Egothor", 30 nor may "Egothor" appear in their name, without prior written 31 permission from Leo.G@seznam.cz. 32 33 In addition, we request that you include in the end-user documentation 34 provided with the redistribution and/or in the software itself an 35 acknowledgement equivalent to the following: 36 "This product includes software developed by the Egothor Project. 37 http://egothor.sf.net/" 38 39 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 40 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 41 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 42 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE 43 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 44 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 45 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 46 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 47 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 48 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 49 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 51 This software consists of voluntary contributions made by many 52 individuals on behalf of the Egothor Project and was originally 53 created by Leo Galambos (Leo.G@seznam.cz). 54 */ 55 package org.egothor.stemmer; 56 57 /** 58 * The Diff object generates a patch string. 59 * <p> 60 * A patch string is actually a command to a stemmer telling it how to reduce a 61 * word to its root. For example, to reduce the word teacher to its root teach 62 * the patch string Db would be generated. This command tells the stemmer to 63 * delete the last 2 characters from the word teacher to reach the stem (the 64 * patch commands are applied starting from the last character in order to save 65 */ 66 public class Diff { 67 int sizex = 0; 68 int sizey = 0; 69 int net[][]; 70 int way[][]; 71 72 int INSERT; 73 int DELETE; 74 int REPLACE; 75 int NOOP; 76 77 /** 78 * Constructor for the Diff object. 79 */ 80 public Diff() { 81 this(1, 1, 1, 0); 82 } 83 84 /** 85 * Constructor for the Diff object 86 * 87 * @param ins Description of the Parameter 88 * @param del Description of the Parameter 89 * @param rep Description of the Parameter 90 * @param noop Description of the Parameter 91 */ 92 public Diff(int ins, int del, int rep, int noop) { 93 INSERT = ins; 94 DELETE = del; 95 REPLACE = rep; 96 NOOP = noop; 97 } 98 99 /** 100 * Apply the given patch string <tt>diff</tt> to the given string <tt> 101 * dest</tt>. 102 * 103 * @param dest Destination string 104 * @param diff Patch string 105 */ 106 public static void apply(StringBuilder dest, CharSequence diff) { 107 try { 108 109 if (diff == null) { 110 return; 111 } 112 113 int pos = dest.length() - 1; 114 if (pos < 0) { 115 return; 116 } 117 // orig == "" 118 for (int i = 0; i < diff.length() / 2; i++) { 119 char cmd = diff.charAt(2 * i); 120 char param = diff.charAt(2 * i + 1); 121 int par_num = (param - 'a' + 1); 122 switch (cmd) { 123 case '-': 124 pos = pos - par_num + 1; 125 break; 126 case 'R': 127 dest.setCharAt(pos, param); 128 break; 129 case 'D': 130 int o = pos; 131 pos -= par_num - 1; 132 /* 133 * delete par_num chars from index pos 134 */ 135 // String s = orig.toString(); 136 // s = s.substring( 0, pos ) + s.substring( o + 1 ); 137 // orig = new StringBuffer( s ); 138 dest.delete(pos, o + 1); 139 break; 140 case 'I': 141 dest.insert(pos += 1, param); 142 break; 143 } 144 pos--; 145 } 146 } catch (StringIndexOutOfBoundsException x) { 147 // x.printStackTrace(); 148 } catch (ArrayIndexOutOfBoundsException x) { 149 // x.printStackTrace(); 150 } 151 } 152 153 /** 154 * Construct a patch string that transforms a to b. 155 * 156 * @param a String 1st string 157 * @param b String 2nd string 158 * @return String 159 */ 160 public synchronized String exec(String a, String b) { 161 if (a == null || b == null) { 162 return null; 163 } 164 165 int x; 166 int y; 167 int maxx; 168 int maxy; 169 int go[] = new int[4]; 170 final int X = 1; 171 final int Y = 2; 172 final int R = 3; 173 final int D = 0; 174 175 /* 176 * setup memory if needed => processing speed up 177 */ 178 maxx = a.length() + 1; 179 maxy = b.length() + 1; 180 if ((maxx >= sizex) || (maxy >= sizey)) { 181 sizex = maxx + 8; 182 sizey = maxy + 8; 183 net = new int[sizex][sizey]; 184 way = new int[sizex][sizey]; 185 } 186 187 /* 188 * clear the network 189 */ 190 for (x = 0; x < maxx; x++) { 191 for (y = 0; y < maxy; y++) { 192 net[x][y] = 0; 193 } 194 } 195 196 /* 197 * set known persistent values 198 */ 199 for (x = 1; x < maxx; x++) { 200 net[x][0] = x; 201 way[x][0] = X; 202 } 203 for (y = 1; y < maxy; y++) { 204 net[0][y] = y; 205 way[0][y] = Y; 206 } 207 208 for (x = 1; x < maxx; x++) { 209 for (y = 1; y < maxy; y++) { 210 go[X] = net[x - 1][y] + DELETE; 211 // way on x costs 1 unit 212 go[Y] = net[x][y - 1] + INSERT; 213 // way on y costs 1 unit 214 go[R] = net[x - 1][y - 1] + REPLACE; 215 go[D] = net[x - 1][y - 1] 216 + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100); 217 // diagonal costs 0, when no change 218 short min = D; 219 if (go[min] >= go[X]) { 220 min = X; 221 } 222 if (go[min] > go[Y]) { 223 min = Y; 224 } 225 if (go[min] > go[R]) { 226 min = R; 227 } 228 way[x][y] = min; 229 net[x][y] = (short) go[min]; 230 } 231 } 232 233 // read the patch string 234 StringBuilder result = new StringBuilder(); 235 final char base = 'a' - 1; 236 char deletes = base; 237 char equals = base; 238 for (x = maxx - 1, y = maxy - 1; x + y != 0;) { 239 switch (way[x][y]) { 240 case X: 241 if (equals != base) { 242 result.append("-" + (equals)); 243 equals = base; 244 } 245 deletes++; 246 x--; 247 break; 248 // delete 249 case Y: 250 if (deletes != base) { 251 result.append("D" + (deletes)); 252 deletes = base; 253 } 254 if (equals != base) { 255 result.append("-" + (equals)); 256 equals = base; 257 } 258 result.append('I'); 259 result.append(b.charAt(--y)); 260 break; 261 // insert 262 case R: 263 if (deletes != base) { 264 result.append("D" + (deletes)); 265 deletes = base; 266 } 267 if (equals != base) { 268 result.append("-" + (equals)); 269 equals = base; 270 } 271 result.append('R'); 272 result.append(b.charAt(--y)); 273 x--; 274 break; 275 // replace 276 case D: 277 if (deletes != base) { 278 result.append("D" + (deletes)); 279 deletes = base; 280 } 281 equals++; 282 x--; 283 y--; 284 break; 285 // no change 286 } 287 } 288 if (deletes != base) { 289 result.append("D" + (deletes)); 290 deletes = base; 291 } 292 293 return result.toString(); 294 } 295 }